This project compares and classifies gravel bike frames.

Notes on the project

  1. The project uses 2D scatterplots of frame measures to compare all bikes in the database across all sizes and across all frames that are spec’d to my size (generally M or 56, but this varies among makes and models). There is noise in size charts, which adds some arbitrariness to the precise location of a model, especially for the length measures that vary across frame sizes.
  2. The project uses hierarchical trees to give a sense of frame geometry similarity. There are many subjective decisions in tree building. so do not take these as objective or even fixed. The clustering is pretty stable but not rigid – adding a bike can occasionally move a frame from one cluster to another!
  3. The project uses the hierarchical trees to divide the frames spec’d to my size into three classes: “gravel-race”, “grave-endurance”, and “gravel-trail”. There are multiple subjective decisions on the workflow to this classification.
  4. The 2D scatterplots show the bikes on the boundary between different classes and why new data or different tree algorithms can create slightly different classifications.

Notes on data

  1. All bike data were taken from manufacturer’s web sites. Some missing data were computed based on other measures or taken from online reviews.
  2. bikinsights.com, geometrygeeks.bike and 99spokes.com are invaluable sites. This project offers a different way of comparing frames.

Some bike geometry links:

The bike geometry Bible - Everything you need to know about the shape of your bike

Frame Geometry Masterclass: Does The Evil Chamois Hagar Make ANY Sense?

MATTER of FACT: How to Understand Gravel Bike Geometry

knitr::opts_chunk$set(echo = TRUE,
                      message = FALSE,
                      warning = FALSE,
                      knitr.kable.NA = '')
# wrangling packages
library(here) # here makes a project transportable
library(janitor) # clean_names
library(readxl) # read excel, duh!
library(data.table) # magical data frames
library(magrittr) # pipes
library(stringr) # string functions
library(forcats) # factor functions

# analysis packages
library(emmeans) # the workhorse for inference
library(nlme) # gls and some lmm
library(lme4) # linear mixed models
library(lmerTest) # linear mixed model inference
library(afex) # ANOVA linear models
library(glmmTMB) # generalized linear models
library(MASS) # negative binomial and some other functions
library(car) # model checking and ANOVA
library(DHARMa) # model checking
library(mvtnorm)

# graphing packages
library(ggsci) # color palettes
library(ggpubr) # publication quality plots
library(ggforce) # better jitter
library(cowplot) # combine plots
library(knitr) # kable tables
library(kableExtra) # kable_styling tables
library(ggdendro) # dendrogram
library(dendextend) # better dendrogram
library(ggiraph)
library(GGally)

# ggplot_the_model.R packages not loaded above
library(insight)
library(lazyWeave)

# use here from the here package
here <- here::here
# use clean_names from the janitor package
clean_names <- janitor::clean_names
# use transpose from data.table
transpose <- data.table::transpose

# load functions used by this text written by me
# ggplot_the_model.R needs to be in the folder "R"
# if you didn't download this and add to your R folder in your
# project, then this line will cause an error
#source_path <- here("R", "ggplot_the_model.R")
#source(source_path)

data_folder <- "data"
image_folder <- "images"
output_folder <- "output"

1 Functions

1.1 General

deg_2_rad <- function(x){
  rad <- x*pi/180
  return(rad)
}

1.2 Bike Geometry

get_axle_crown <- function(){
  
}

get_chainstay_h <- function(chainstay_length, 
                            bottom_bracket_drop){
  # the horizontal component of chainstay length 
  # bbd = bottom bracket drop
  # csl = chainstay length
  chainstay_h <- sqrt(chainstay_length^2 - bottom_bracket_drop^2)
  return(chainstay_h)
}


get_rake_h <- function(offset, hta){
  # the horizontal component of fork offset
  rake_h <- offset/sin(deg_2_rad(hta))
  return(rake_h)
}

get_ht_h <- function(hta, htl){
  # the horizontal component of head_tube
  # hta = head tube angle
  # htl = head tube length
  ht_h <- htl*cos(deg_2_rad(hta))
  return(ht_h)
}
get_ht_v <- function(hta, htl){
  # the vertical component of head_tube
  # hta = head tube angle
  # htl = head tube length
  ht_v <- htl*sin(deg_2_rad(hta))
  return(ht_v)
}


get_fork_angle <- function(offset, axle_crown, head_tube_angle){
  # angle of fork axle-crown axis to horizontal
  # beta is angle of fork axle-crow to offset line
  beta <- acos(offset/axle_crown)*180/pi
  # delta is angle from offset line to horizontal
  delta <- 90 - head_tube_angle
  fork_angle <- beta - delta
  return(rake_h)
}
# Solace OM3 does not specify head tube length. This can be
# computed using specs of Whisky MCX fork assuming this is
# the fork used to spec wheelbase
head_tube_length <- function(axle_crown, rake, stack, wheelbase){
  rake_h <- get_rake_h(geobike[, fork_offset_rake],
                       geobike[, head_tube_angle])
  fork_angle <- get_fork_angle(geobike[, fork_offset_rake],
                               geobike[, fork_axle_crown],
                               geobike[, head_tube_angle])
}
# Vagabond Genesis does not specify chainstay length.
get_chainstay_length <- function(rake, reach, stack, wheelbase,
                                 hta, htl, bbd){
  head_tube_h <- get_ht_h(hta, htl)
  head_tube_v <- get_ht_v(hta, htl)
  fork_v <- stack -  bbd -
    head_tube_v
  
  fork_h1 = fork_v/tan(deg_2_rad(hta))
  rake_h <- get_rake_h(rake,
                       hta)
  chainstay_h <- wheelbase - reach - head_tube_h - fork_h1 - 
    rake_h
  
  chainstay <- sqrt(chainstay_h^2 + bbd^2)
  
  return(chainstay)
}
get_fork_offset <- function(stack, reach, head_tube_angle, chainstay_length, bottom_bracket_drop, wheelbase){
  # steer_axis_h is base of triangle from top-head-tube to vertex created by steering axis and wheelbase.
  # tan hta <- stack/steer_axis_h
  steer_axis_v <- stack - bottom_bracket_drop
  steer_axis_h <- steer_axis_v /
    tan(deg_2_rad(head_tube_angle))
  chainstay_h <- get_chainstay_h(chainstay_length,
                                 bottom_bracket_drop)
  rake_h <- wheelbase - chainstay_h - reach - steer_axis_h
  rake <- rake_h * sin(deg_2_rad(head_tube_angle))
  return(rake)
}
get_effective_top_tube_length <- function(stack,
                                          reach,
                                          seat_tube_angle){
  # amigo bug out is missing this
  #
  seat_h <- stack/tan(deg_2_rad(seat_tube_angle))
  effective_top_tube_length <- seat_h + reach
  return(effective_top_tube_length)
}
geom_checker <- function(chainstay_length, # chainstay length
                         bottom_bracket_drop, # bottom bracket drop
                         reach,
                         stack,
                         head_tube_angle, # head tube angle
                         rake, # head tube length
                         wheelbase){ # wheelbase
  # do all the horizontal components add to wheelbase?
  chainstay_length_h <- get_chainstay_h(chainstay_length,
                                        bottom_bracket_drop)
  steer_axis_v <- stack - bottom_bracket_drop
  steer_axis_h <- steer_axis_v /
    tan(deg_2_rad(head_tube_angle))

  rake_h <- get_rake_h(rake,
                       head_tube_angle)
  wheelbase_computed <- chainstay_length_h + reach +
    steer_axis_h + rake_h

  }

1.3 Importer

# data_path <- here(data_folder, "ghost_grappler.txt")
# dt <- fread(data_path)
# bike_label = "Tumbleweed Stargazer 2022"
# bike_range = "b1:h21"

read_bike <- function(bike_label = "Breezer Radar X Pro 2022",
                      bike_range = "B1:I19"){
  data_file <- "bikes.xlsx"
  data_path <- here(data_folder, data_file)
  bike_wide <- read_excel(data_path,
                          sheet = bike_label,
                          range = bike_range) %>%
    data.table
  # re-read with coltype = numeric
  # col_type_list <- c("text", "text", rep("numeric", ncol(bike_wide)-2))
  # bike_wide <- read_excel(data_path,
  #                         sheet = bike_label,
  #                         range = bike_range,
  #                         col_types = col_type_list) %>%
  #   data.table
  
  
  bike_model <- substr(bike_label, 1, nchar(bike_label) - 5)
  model_year <- substr(bike_label,
                       nchar(bike_label) - 4,
                       nchar(bike_label))
  bike_wide <- bike_wide[, -2]
  bike <- data.table(
    model = bike_model,
    year = model_year,
    transpose(bike_wide,
              keep.names = "frame_size",
              make.names = 1)
  )
  keep_names <- c("model","frame_size", "seat_tube_length", "top_tube_effective_length", "head_tube_length", "seat_tube_angle", "head_tube_angle", "chainstay_length", "wheelbase", "bottom_bracket_drop", "fork_offset_rake", "stack", "reach", "standover", "stem_length", "handlebar_width", "crank_length", "wheel_size", "tire_width_spec", "tire_width_max")
  bike <- bike[, .SD, .SDcols = keep_names]
  
  # fill in missing
    # chainstay_length
  bike[, chainstay_length := 
         ifelse(is.na(chainstay_length),
                get_chainstay_length(fork_offset_rake,
                                     reach,
                                     stack,
                                     wheelbase,
                                     head_tube_angle,
                                     head_tube_length,
                                     bottom_bracket_drop),
                chainstay_length)]
    # fork_offset_rake
  bike[, fork_offset_rake := 
         ifelse(is.na(fork_offset_rake),
                get_fork_offset(stack,
                                reach,
                                head_tube_angle,
                                chainstay_length,
                                bottom_bracket_drop,
                                wheelbase),
                fork_offset_rake)]
  # top_tube_effective_length
  bike[, top_tube_effective_length := 
         ifelse(is.na(top_tube_effective_length),
                get_effective_top_tube_length(stack,
                                              reach,
                                              seat_tube_angle),
                top_tube_effective_length)] 
  
  # constructed measures
  bike[, model_size := paste(model, frame_size)]
  bike[, rear_center := sqrt(chainstay_length^2 - bottom_bracket_drop^2)] # horizontal
  bike[, front_center := wheelbase - rear_center] # horizontal
  bike[, seat_center := stack/tan(deg_2_rad(seat_tube_angle))]
  
  # ratios
  bike[, stack_reach := stack/reach]
  bike[, front_rear := front_center/rear_center]
  bike[, rear_wheelbase := rear_center/wheelbase]
  bike[, front_wheelbase := front_center/wheelbase]
  bike[, sta_hta := seat_tube_angle/head_tube_angle]

  # decompositions
  # seat_tube_v and seat_tube_h are decomp of seat tube
  bike[, seat_tube_v := seat_tube_length *
         sin(deg_2_rad(seat_tube_angle))]
  bike[, seat_tube_h := seat_tube_length *
         cos(deg_2_rad(seat_tube_angle))]
  # seat_v and seat_h are decomp of seat positioned at stack height
  # tan(STA) = seat_h/seat_v
  bike[, seat_v := stack]
  bike[, seat_h := stack /
         tan(deg_2_rad(seat_tube_angle))]
  # head_v and head_h are decomp of head tube
  bike[, head_v := head_tube_length * sin(deg_2_rad(head_tube_angle))]
  bike[, head_h := head_tube_length * cos(deg_2_rad(head_tube_angle))]

  # landmarks with rear axle as origin
  bike[, x1 := 0] # rear axle
  bike[, y1 := 0]
  bike[, x2 := rear_center - seat_h] # seat at stack height
  bike[, y2 := stack - bottom_bracket_drop]
  bike[, x3 := rear_center + reach] # head tube top
  bike[, y3 := stack - bottom_bracket_drop]
  bike[, x4 := x3 + head_h] # head tube base
  bike[, y4 := y3 - head_v]
  bike[, x5 := wheelbase] # front axle
  bike[, y5 := 0]
  bike[, x6 := rear_center] # bottom bracket
  bike[, y6 := -bottom_bracket_drop]
  bike[, x7 := rear_center - seat_tube_h] # seat tube
  bike[, y7 := seat_tube_v]
  
  # landmarks_named
  bike[, rear_x := x1]
  bike[, rear_y := y1]
  bike[, seat_x := x2]
  bike[, seat_y := y2]
  bike[, head_x := x3]
  bike[, head_y := y3]
  bike[, crown_x := x4]
  bike[, crown_y := y4]
  bike[, front_x := x5]
  bike[, front_y := y5]
  bike[, bottom_x := x6]
  bike[, bottom_y := y6]
  bike[, seattube_x := x7]
  bike[, seattube_y := y7]
  
  return(bike)
}

1.4 Import

data_path <- here(data_folder, "bike_list.txt")
bike_list <- fread(data_path)
geobike <- data.table(NULL)
for(i in 1:nrow(bike_list)){
  bike_label_i <- as.character(bike_list[i, "model"])
  bike_range_i <- as.character(bike_list[i, "data_range"])
  bike_i <- read_bike(bike_label = bike_label_i,
                      bike_range = bike_range_i)
  bike_i[, my_fit := ifelse(frame_size == c(bike_list[i, "my_fit"]), TRUE, FALSE)]
  geobike <- rbind(geobike, bike_i)
}

# my_fit: use 176 cm (I am 175.5)
# add Breezer small to my_fit
# geobike[model == "Breezer Radar X Pro" & frame_size == "48cm (S)", my_fit := TRUE]
# add Boone 54 to my_fit
# geobike[model == "Trek Boone 6" & frame_size == "54 cm", my_fit := TRUE]


# add column of shape id for plots
shape_list <- c(15,17,19,0,2)
n_shapes <- length(shape_list)
n_models <- length(unique(geobike[, model]))
n_recycles <- floor(n_models/n_shapes)
left_over <- n_models - n_recycles*n_shapes
model_2_shape_map <- c(rep(shape_list, n_recycles), shape_list[1:left_over])
geobike[, shape_id := model_2_shape_map[as.integer(as.factor(model))]]

1.5 Center landmarks at bottom bracket

y_cols <- c("rear_x", "rear_y",
            "seat_x", "seat_y",
            "head_x", "head_y",
            "crown_x", "crown_y",
            "front_x", "front_y",
            "bottom_x", "bottom_y",
            "seattube_x", "seattube_y")

# center X at bottom bracket
geobike[, rear_x := rear_x - bottom_x]
geobike[, seat_x := seat_x - bottom_x]
geobike[, head_x := head_x - bottom_x]
geobike[, crown_x := crown_x - bottom_x]
geobike[, front_x := front_x - bottom_x]
geobike[, bottom_x := bottom_x - bottom_x]
geobike[, seattube_x := seattube_x - bottom_x]

2 Frame size classification – Initial

Three measures of frame size are computed

  1. \(\texttt{stack_reach_size_geomean}\) is the geometric mean of stack and reach.
  2. \(\texttt{rider_size}\) is the geometric mean of \(\texttt{seat_tube_effective_length}\) and \(\texttt{top_tube_effective_length}\). \(\texttt{seat_tube_effective_length}\) is the size component related to the rider’s leg length. \(\texttt{top_tube_effective_length}\) is the size component related to the rider’s torso and arm length.
  3. \(\texttt{centroid_size}\) of the three vertices of the front triangle created by the top of the virtual seat tube, the top of the head tube, and the bottom bracket.
# stack + reach size
geobike[, stack_reach_size_euclid := sqrt(stack^2 + reach^2)]
geobike[, stack_reach_size_geomean := sqrt(stack * reach)]

# effective seat tube + effective top tube size
geobike[, seat_tube_effective_length :=
          sqrt((seat_x - bottom_x)^2 + (seat_y - bottom_y)^2)]
geobike[, rider_size := sqrt(seat_tube_effective_length * 
                               top_tube_effective_length)]

# upper triangle centroid size
geobike[, centroid_x := (seat_x + bottom_x + head_x)/3]
geobike[, centroid_y := (seat_y + bottom_y + head_y)/3]
geobike[, centroid_size := 
          sqrt((seat_x - centroid_x)^2 +
          (seat_y - centroid_y)^2 +
          (bottom_x - centroid_x)^2 +
          (bottom_y - centroid_y)^2 +
          (head_x - centroid_x)^2 +
          (head_y - centroid_y)^2)]

# bike centroid size
geobike[, bike_centroid_x := (rear_x + seat_x + head_x + crown_x + front_x + bottom_x)/3]
geobike[, bike_centroid_y := (rear_y + seat_y + head_y + crown_y + front_y + bottom_y)/3]
geobike[, bike_centroid_size := 
          sqrt(
            (rear_x - bike_centroid_x)^2 +
              (rear_y - bike_centroid_y)^2 +
              (seat_x - bike_centroid_x)^2 +
              (seat_y - bike_centroid_y)^2 +
              (head_x - bike_centroid_x)^2 +
              (head_y - bike_centroid_y)^2 +
              (crown_x - bike_centroid_x)^2 +
              (crown_y - bike_centroid_y)^2 +
              (front_x - bike_centroid_x)^2 +
              (front_y - bike_centroid_y)^2 +
              (bottom_x - bike_centroid_x)^2 +
              (bottom_y - bike_centroid_y)^2
          )]
size <- "bike_centroid_size"
size <- geobike[, get(size)]
c.x <- geobike[, bike_centroid_x]
c.y <- geobike[, bike_centroid_y]

# do not scale
# size <- 1
# c.x <- 0
# c.y <- 0

# centroid size based on seat/headtube/bottom bracket triangle
geobike[, rear_xs := (rear_x - c.x)/size]
geobike[, rear_ys := (rear_y - c.y)/size]
geobike[, seat_xs := (seat_x - c.x)/size]
geobike[, seat_ys := (seat_y - c.y)/size]
geobike[, head_xs := (head_x - c.x)/size]
geobike[, head_ys := (head_y - c.y)/size]
geobike[, crown_xs := (crown_x - c.x)/size]
geobike[, crown_ys := (crown_y - c.y)/size]
geobike[, front_xs := (front_x - c.x)/size]
geobike[, front_ys := (front_y - c.y)/size]
geobike[, bottom_xs := (bottom_x - c.x)/size]
geobike[, bottom_ys := (bottom_y - c.y)/size]
geobike[, seattube_xs := (seattube_x - c.x)/size]
geobike[, seattube_ys := (seattube_y - c.y)/size]
my_fit <- geobike[my_fit == TRUE,]

shape_map <- setNames(geobike$shape_id, geobike$model)

nudge_percent <- 0.01
gg1 <- ggplot(data = geobike,
             aes(x = centroid_size,
                 y = rider_size,
                 color = model)) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size,
                             shape = model),
                         show.legend = FALSE) +
  scale_shape_manual(values = shape_map)

nudge_pos <- nudge_percent*(max(my_fit$stack_reach_size_geomean) - min(my_fit$stack_reach_size_geomean))

gg2 <- ggplot(data = geobike,
             aes(x = stack_reach_size_geomean,
                 y = centroid_size,
                 color = model)) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size,
                             shape = model),
                         show.legend = FALSE) +
  scale_shape_manual(values = shape_map)

nudge_pos <- nudge_percent*(max(my_fit$stack_reach_size_geomean) - min(my_fit$stack_reach_size_geomean))

gg3 <- ggplot(data = my_fit,
             aes(x = stack_reach_size_geomean,
                 y = rider_size,
                 color = model,
                 label = model)) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),
                         show.legend = FALSE) +
  geom_text(hjust = 0, nudge_x = nudge_pos, size = 2, show.legend = FALSE)
girafe(ggobj = gg1)

Figure 2.1: Hover over points to identify model and frame size

girafe(ggobj = gg2)

Figure 2.2: Hover over points to identify model and frame size

girafe(ggobj = gg3)

Figure 2.3: Hover over points to identify model and frame size

The goal here is to use the frame size measures to classify the bikes into size classes. First, here is the number of bike models that offer a specific number of frame sizes.

frame_sizes_per_model <- geobike[, .(n_sizes = .N), by = .(model)]
size_dist <- frame_sizes_per_model[, .(n_models = .N), by = .(n_sizes)]
ggplot(data = size_dist,
       aes(x = n_sizes,
           y = n_models)) +
  geom_col() +
  ylab("Number of models") +
  xlab("Number of frame sizes") +
  theme_pubr()
Distribution of bike models that offer a specific number of frame sizes

Figure 2.4: Distribution of bike models that offer a specific number of frame sizes

Use k-means clustering to classify into five size classes and seven size classes. The three frame size variables are the inputs.

y_cols <- c("stack_reach_size_geomean", "rider_size", "centroid_size")

y_cols <- "centroid_size"

# 5 sizes
sizes <- c("extra-small", "small", "medium", "large", "extra-large")
n_sizes <- length(sizes)
size_groups <- kmeans(x = geobike[, .SD, .SDcols = y_cols],
                                  centers = n_sizes)
sizing <- size_groups$cluster
geobike[, size_cluster_5 := sizing]
cluster_means <- geobike[, .(cluster_mean = mean(stack_reach_size_geomean)),
                         by = .(size_cluster_5)] %>%
  dplyr::arrange(cluster_mean) %>%
  data.table()
cluster_means[, sizes := sizes]
cluster_means <- dplyr::arrange(cluster_means, size_cluster_5)
geobike[, frame_size_5 := cluster_means$sizes[size_cluster_5]]
geobike[, frame_size_5 := factor(frame_size_5,
                                 levels = sizes)]

# 7 sizes
sizes <- c("extra-small", "small", "small-medium", "medium", "medium-large", "large", "extra-large")
n_sizes <- length(sizes)
size_groups <- kmeans(x = geobike[, .SD, .SDcols = y_cols],
                                  centers = n_sizes) 
sizing <- size_groups$cluster
geobike[, size_cluster_7 := sizing]
cluster_means <- geobike[, .(cluster_mean = mean(stack_reach_size_geomean)),
                         by = .(size_cluster_7)] %>%
  dplyr::arrange(cluster_mean) %>%
  data.table()
cluster_means[, sizes := sizes]
cluster_means <- dplyr::arrange(cluster_means, size_cluster_7)
geobike[, frame_size_7 := cluster_means$sizes[size_cluster_7]]
geobike[, frame_size_7 := factor(frame_size_7,
                                 levels = sizes)]
y_cols <- c("model", "frame_size", "frame_size_5", "frame_size_7")
#y_cols <- c("model", "frame_size", "frame_size_7")
# View(geobike[, .SD, .SDcols = y_cols])
gg1 <- ggplot(data = geobike,
              aes(x = frame_size_5,
                  y = top_tube_effective_length,
                  color = model,
                  shape = model)) + 
  geom_jitter_interactive(aes(tooltip = model_size,
                              data_id = model_size),
                          width = 0.2,
                          show.legend = FALSE) +
  scale_shape_manual(values = shape_map) +
  ylab("Top Tube, Effective Length (mm)")


gg2 <- ggplot(data = geobike,
              aes(x = frame_size_7,
                  y = top_tube_effective_length,
                  color = model,
                  shape = model)) + 
  geom_jitter_interactive(aes(tooltip = model_size,
                              data_id = model_size),
                          width = 0.2,
                          show.legend = FALSE) +
  scale_shape_manual(values = shape_map) +
  ylab("Top Tube, Effective Length (mm)")

girafe(ggobj = gg1)

Figure 2.5: Hover over points to identify model and frame size

# girafe(ggobj = gg2)

Notes

  1. Because the bikepacking/off-road bikes have extra high stack and/or extra long reach, the only extra-large bikes are bikepacking/off-road models and all of the all-road/race gravel bikes are classified into smaller bins then their specified size.
  2. This suggests re-classifying within style classifications.

3 Style classification

treed <- function(geobike_subset,
                  y_cols,
                  scale_it = TRUE,
                  center_it = TRUE,
                  hclust_method = "ward.D2"
){
  dd <- dist(scale(geobike_subset[, .SD, .SDcols = y_cols],
                   center = center_it,
                   scale = scale_it),
             method = "euclidean")
  dendro <- hclust(dd, method = hclust_method) %>%
    as.dendrogram() %>%
    place_labels(paste(geobike_subset[, model],
                       geobike_subset[, frame_size],
                       sep = ", "))
  
  return(dendro)
  # gg <- ggdendrogram(dendro)
  # return(gg)
}

3.1 Geometric frame shape

var_labels <- c("Rear wheel X", "Rear wheel Y",
                "Seat at stack height, X",
                "Head tube X", "Head tube Y",
                "Fork crown X", "Fork crown Y",
                "Front wheel X", "Front wheel Y",
                "Bottom bracket X", "Bottom bracket Y")
data.table(
  Coordinates = var_labels
) %>%
  kable() %>%
  kable_styling(full_width = FALSE)
Coordinates
Rear wheel X
Rear wheel Y
Seat at stack height, X
Head tube X
Head tube Y
Fork crown X
Fork crown Y
Front wheel X
Front wheel Y
Bottom bracket X
Bottom bracket Y
y_cols <- c("rear_xs", "rear_ys",
            # seat_ys is redundant with head_ys
            "seat_xs",
            "head_xs", "head_ys",
            "crown_xs", "crown_ys",
            "front_xs", "front_ys",
            "bottom_xs", "bottom_ys")
geobike_subset <- geobike[my_fit == TRUE,]
scale_it <- FALSE
center_it <- FALSE
dendro <- treed(geobike_subset,
            y_cols,
            scale_it,
            center_it,
            hclust_method = "average")
gg <- ggdendrogram(dendro)
 
gg

Notes

  1. Method – UPGMA method using landmark coordinates centered at the frame centroid and scaled by frame centroid size, for frames spec’d to my size.

3.2 Traditional measures

y_cols <- c("stack", "reach", "front_center", "rear_center", "bottom_bracket_drop", "fork_offset_rake", "head_tube_angle", "seat_tube_angle")
var_labels <- c("Stack", "Reach",
                "Front-center horizontal",
                "Rear-center horizontal",
                "Bottom bracket drop",
                "Fork offset",
                "Head tube angle",
                "Seat tube angle")
data.table(
  Variables = var_labels
) %>%
  kable() %>%
  kable_styling(full_width = FALSE)
Variables
Stack
Reach
Front-center horizontal
Rear-center horizontal
Bottom bracket drop
Fork offset
Head tube angle
Seat tube angle
y_cols <- c("stack", "reach", "front_center", "rear_center", "head_tube_angle", "seat_tube_angle", "bottom_bracket_drop", "fork_offset_rake")

geobike_subset <- geobike[my_fit == TRUE,]
scale_it <- TRUE
center_it <- TRUE
dendro_init_full <- treed(geobike_subset,
                y_cols,
                scale_it,
                center_it,
                hclust_method = "ward.D2") #ward.D2
gg <- ggdendrogram(dendro_init_full)
 
gg

Notes

  1. Method – Ward’s method using centered/scaled measures of frames spec’d for my height
  2. Three major clusters, from left to right
  • trail: drop-bar mtn bikes and flat-bar gravel bikes
  • all-road and race gravel
  • bikepacking

3.3 Style classification table

Using the traditional-measures tree above, the frames spec’d to my size can be classified into the three styles: All-road, Bikepacking, Trail

options(knitr.kable.NA = '')
n_clusters <- 3 
class_dendro <- cutree(dendro_init_full, k = n_clusters)
cluster_labels <- c("Trail", "Bikepacking", "All-road")
labels <- str_split_fixed(names(class_dendro), ",", 2)[,1]
style_class <- data.table(
  model = labels,
  style = cluster_labels[class_dendro]
)

# add style to geobike
geobike <- plyr::join(geobike, style_class, by = "model")
my_fit <- geobike[my_fit == TRUE,]

# dcast(setDT(DF), rowid(ID) ~ ID, value.var = "total")
cluster_labels <- c("All-road", "Bikepacking", "Trail")

style_table <-dcast(setDT(style_class), rowid(style) ~ style, value.var = "model")[, .SD, .SDcols = cluster_labels]

style_table %>%
  kable() %>%
  kable_styling(full_width = FALSE)
All-road Bikepacking Trail
Trek Boone 6 Mason InSearchOf Breezer Radar X Pro
Trek Checkpoint SL5 Tumbleweed Stargazer Evil Chamois Hagar GRX
Ribble Gravel SL Tout Terrain Scrambler 28 BMC URS One
Lauf Siegla Ritchey Outback frameset Surly Ghost Grappler
No22 Drifter X Bombtrack Beyond 2 Knolly Cache Steel
Niner RLT 9 RDO Salsa Vaya Specialized Diverge Evo
OPEN U.P. Salsa Fargo front dropout Marin DSX 2
Thesis OB1 Salsa Fargo rear dropout Whyte Friston Gravel
All-City Gorilla Monsoon Kona Sutra ULTD Enigma Escape Flat-bar
All-City Cosmic Stallion Cinelli Hobootleg Geo Merida Silex
Chumba Terlingua steel fdo Noble GX 5 Fiftyone Assassin long-low
Shand Stooshie BlackMtnCy Monstercross V5 Fiftyone Assassin short-hi
Salsa Warbird BlackMtnCy La Cabra Revel Rover
Pinarello Grevil F Salsa Cutthroat Kanzo Adventure New
Canyon Grail 7 1by Moots Routt ESC Bombtrack Beyond+ Adv
Canyon Grizl 7 1by Light Blue Darwin Amigo Bug Out
Obed Boundary Reeb Sams Pants Hudski Doggler Gravel
Solace OM-3 Short Genesis Vagabond Sonder Camino AL
Santa Cruz Stigmata Specialized Diverge Otso Fenrir
Why R+ V4 Bearclaw Beaux Jaxon Cotic Cascade
Bombtrack Hook Chiru Kegeti Chumba Yaupon
Squid Gravtron Mosaic GT-1X BMC URS AL
Alchemy Rogue Panorama Taiga EXP BMC URS AL SUS
Otso Warakin Stainless Nordest Kutxo
Blackheart All Road TI
Cervelo Aspero

4 Pairwise

4.1 Stack and Reach

Notes

  1. Stack and reach are the most common quick & dirty measure of frame size. These are imperfect measures of frame size because both measures are confounded by bike style – more bike-packing and mountain-bike inspired (“trail”) gravel bikes have high stack or long reach, or both, for their specified size class relative to all-road gravel bikes of the same size class.
gg1 <- ggplot(data = geobike,
             aes(x = stack,
                 y = reach,
                 color = model)) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size,
                             shape = model),
                         show.legend = FALSE) +
   scale_shape_manual(values = shape_map)

nudge_pos <- nudge_percent*(max(my_fit$stack) - min(my_fit$stack))

gg2 <- ggplot(data = my_fit,
             aes(x = stack,
                 y = reach,
                 color = model,
                 label = model)) +
  geom_text(hjust = 0, nudge_x = nudge_pos, size = 2, show.legend = FALSE) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),
                         show.legend = FALSE)

gg3 <- ggplot(data = my_fit,
             aes(x = stack,
                 y = reach,
                 color = style,
                 label = model)) +
  geom_text(hjust = 0, nudge_x = nudge_pos, size = 2, show.legend = FALSE) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),
                         show.legend = FALSE)
girafe(ggobj = gg1)

Figure 4.1: Hover over points to identify model and frame size

girafe(ggobj = gg2)

Figure 4.2: Hover over points to identify model and frame size

girafe(ggobj = gg3)

Figure 4.3: Hover over points to identify model and frame size

4.2 Rear-center and Front-center

Notes

  1. Rear-center and front-center here are the horizontal components. Combined, the two sum to the wheelbase.
gg1 <- ggplot(data = geobike,
             aes(x = front_center,
                 y = rear_center,
                 color = model)) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size,
                             shape = model),
                         show.legend = FALSE) +
  scale_shape_manual(values = shape_map)

nudge_pos <- nudge_percent * (max(my_fit$front_center) -
                                min(my_fit$front_center))
gg2 <- ggplot(data = my_fit,
             aes(x = front_center,
                 y = rear_center,
                 color = model,
                 label = model)) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),
                         show.legend = FALSE) +
  geom_text(hjust = 0, nudge_x = nudge_pos, size = 2, show.legend = FALSE)

gg3 <- ggplot(data = my_fit,
             aes(x = front_center,
                 y = rear_center,
                 color = style,
                 label = model)) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),
                         show.legend = FALSE) +
  geom_text(hjust = 0, nudge_x = nudge_pos, size = 2, show.legend = FALSE)
girafe(ggobj = gg1)

Figure 4.4: Hover over points to identify model and frame size

girafe(ggobj = gg2)

Figure 4.5: Hover over points to identify model and frame size

girafe(ggobj = gg3)

Figure 4.6: Hover over points to identify model and frame size

nudge_pos <- nudge_percent * (max(my_fit$front_wheelbase) -
                                min(my_fit$front_wheelbase))
gg4 <- ggplot(data = my_fit,
             aes(x = front_wheelbase,
                 y = stack_reach,
                 color = style,
                 label = model)) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),
                         show.legend = FALSE) +
  geom_text(hjust = 0, nudge_x = nudge_pos, size = 2, show.legend = FALSE)
girafe(ggobj = gg4)

4.3 Seat Tube Angle and Head Tube Angle

y_cols <- c("seat_tube_angle", "stack", "reach", "rear_center", "front_center", "head_tube_angle")

ggpairs(geobike[, .SD, .SDcols = y_cols])

gghistogram(data = my_fit,
            x = "seat_tube_angle",
            color = "style",
            fill = "style")
gg1 <- ggplot(data = geobike,
             aes(x = head_tube_angle,
                 y = seat_tube_angle,
                 color = model)) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size,
                             shape = model),
                         show.legend = FALSE) +
  scale_shape_manual(values = shape_map)

nudge_pos <- nudge_percent * (max(my_fit$head_tube_angle) -
                                min(my_fit$head_tube_angle))
gg2 <- ggplot(data = my_fit,
             aes(x = head_tube_angle,
                 y = seat_tube_angle,
                 color = model,
                 label = model)) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),
                         show.legend = FALSE) +
  geom_text(hjust = 0, nudge_x = nudge_pos, size = 2, show.legend = FALSE)

gg3 <- ggplot(data = my_fit,
             aes(x = head_tube_angle,
                 y = seat_tube_angle,
                 color = style,
                 label = model)) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),
                         show.legend = FALSE) +
  geom_text(hjust = 0, nudge_x = nudge_pos, size = 2, show.legend = FALSE)

nudge_pos <- nudge_percent * (max(my_fit$rear_wheelbase) -
                                min(my_fit$rear_wheelbase))
gg4 <- ggplot(data = my_fit,
             aes(x = rear_wheelbase,
                 y = seat_tube_angle,
                 color = style,
                 label = model)) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),
                         show.legend = FALSE) +
  geom_text(hjust = 0, nudge_x = nudge_pos, size = 2, show.legend = FALSE)
girafe(ggobj = gg1)

Figure 4.7: Hover over points to identify model and frame size

girafe(ggobj = gg2)

Figure 4.8: Hover over points to identify model and frame size

girafe(ggobj = gg3)

Figure 4.9: Hover over points to identify model and frame size

girafe(ggobj = gg4)

Figure 4.10: Hover over points to identify model and frame size

4.4 ratios

nudge_pos <- nudge_percent * (max(my_fit$rear_wheelbase) -
                                min(my_fit$rear_wheelbase))
gg1 <- ggplot(data = my_fit,
             aes(x = front_wheelbase,
                 y = stack_reach,
                 color = style,
                 label = model)) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),
                         show.legend = FALSE) +
  geom_text(hjust = 0, nudge_x = nudge_pos, size = 2, show.legend = FALSE)

nudge_pos <- nudge_percent * (max(my_fit$rear_wheelbase) -
                                min(my_fit$rear_wheelbase))
gg2 <- ggplot(data = my_fit,
             aes(x = front_wheelbase,
                 y = seat_tube_angle/head_tube_angle,
                 color = style,
                 label = model)) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),
                         show.legend = FALSE) +
  geom_text(hjust = 0, nudge_x = nudge_pos, size = 2, show.legend = FALSE)

nudge_pos <- nudge_percent * (max(my_fit$stack_reach) -
                                min(my_fit$stack_reach))
gg3 <- ggplot(data = my_fit,
             aes(x = stack_reach,
                 y = sta_hta,
                 color = style,
                 label = model)) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),
                         show.legend = FALSE) +
  geom_text(hjust = 0, nudge_x = nudge_pos, size = 2, show.legend = FALSE)
girafe(ggobj = gg1)
girafe(ggobj = gg2)
girafe(ggobj = gg3)

4.5 Head Tube Angle vs. Fork Offset

Notes

  1. Head Tube Angle, Fork Offset, and Head Tube length are frame geometry contributions to trail but also affect toe-overlap in small bikes, especially with wide tires. I didn’t include trail in these analysis because it is a function of wheel plus tire diameter. I could use the spec’d wheel and tire and add this.
gg1 <- ggplot(data = geobike,
             aes(x = head_tube_angle,
                 y = fork_offset_rake,
                 color = model)) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size,
                             shape = model),
                         show.legend = FALSE) +
  scale_shape_manual(values = shape_map)

nudge_pos <- nudge_percent * (max(my_fit$head_tube_angle) -
                                min(my_fit$head_tube_angle))
gg2 <- ggplot(data = my_fit,
             aes(x = head_tube_angle,
                 y = fork_offset_rake,
                 color = model,
                 label = model)) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),
                         show.legend = FALSE) +
  geom_text(hjust = 0, nudge_x = nudge_pos, size = 2, show.legend = FALSE)

gg3 <- ggplot(data = my_fit,
             aes(x = head_tube_angle,
                 y = fork_offset_rake,
                 color = style,
                 label = model)) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),
                         show.legend = FALSE) +
  geom_text(hjust = 0, nudge_x = nudge_pos, size = 2, show.legend = FALSE)

girafe(ggobj = gg1)

Figure 4.11: Hover over points to identify model and frame size

girafe(ggobj = gg2)

Figure 4.11: Hover over points to identify model and frame size

girafe(ggobj = gg3)

Figure 4.11: Hover over points to identify model and frame size

5 PCA

Principal Component Analysis is a cheap way of exploring similarity of bike frames through different 2D views of a multidimensional space.

5.1 Coordinates

Coordinates are unscaled and centered at the intersection of the bottom bracket chord and the wheelbase chord.

y_cols <- c("rear_xs", "rear_ys",
            # seat_ys is redundant with head_ys
            "seat_xs",
            "head_xs", "head_ys",
            "crown_xs", "crown_ys",
            # front_ys is redundant with rear_ys
            "front_xs",
            "bottom_xs", "bottom_ys")
y_labs <- c("Rear wheel X", "Rear wheel Y",
            "Seat X",
            "Head tube X", "Head tube Y",
            "Fork Crown X", "Fork Crown Y",
            "Front wheel X",
            "Bottom Bracket X", "Bottom Bracket Y")

y_cols <- c("rear_x",
            "seat_x",
            "head_x", "head_y",
            "crown_x", "crown_y",
            "front_x",
            "bottom_y")
y_labs <- c("Rear wheel X",
            "Seat X",
            "Head tube X", "Head tube Y",
            "Fork Crown X", "Fork Crown Y",
            "Front wheel X",
            "Bottom Bracket Y")


geobike_subset <- geobike[my_fit == TRUE]
X <- geobike_subset[, .SD, .SDcols = y_cols] %>%
  as.matrix()

S <- cov(X)

geo_eigen <- eigen(S)

L <- geo_eigen$values
E <- geo_eigen$vector
scores <- X %*% E
pc1 <- scores[, 1]
pc2 <- scores[, 2]
pc3 <- scores[, 3]
geobike_subset[, pc1 := pc1]
geobike_subset[, pc2 := pc2]
geobike_subset[, pc3 := pc3]

coord_loadings <- cor(cbind(scores[,1:3], X))[-(1:3), 1:3]
row.names(coord_loadings) <- y_labs
table_cap <- "Correlations (or loadings) between PCs and coordinates centered at the bottom bracket with bike facing in positive X direction (right). Positive X correlations indicate higher PC is toward bike front. Positive Y correlations indicate higher PC is higher (more upward)."
coord_loadings %>%
  kable(digits = 2,
        caption = table_cap) %>%
  kable_styling(full_width = FALSE)
Table 5.1: Correlations (or loadings) between PCs and coordinates centered at the bottom bracket with bike facing in positive X direction (right). Positive X correlations indicate higher PC is toward bike front. Positive Y correlations indicate higher PC is higher (more upward).
Rear wheel X -0.46 -0.35 -0.18
Seat X -0.60 -0.36 -0.48
Head tube X 0.33 -0.90 -0.14
Head tube Y 0.89 0.28 0.35
Fork Crown X 0.32 -0.93 0.17
Fork Crown Y 0.88 0.41 -0.25
Front wheel X 0.84 -0.52 -0.07
Bottom Bracket Y 0.19 0.30 0.06
gg1 <- ggplot(data = geobike_subset,
              aes(x = pc1,
                  y = pc2,
                  color = model,
                  shape = model)) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),                          show.legend = FALSE) +
  scale_shape_manual(values = shape_map) +
  coord_fixed()

gg1b <- ggplot(data = geobike_subset,
              aes(x = pc1,
                  y = pc2,
                  color = style,
                  shape = model)) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),                          show.legend = FALSE) +
  scale_shape_manual(values = shape_map) +
  coord_fixed()

gg2 <- ggplot(data = geobike_subset,
              aes(x = pc1,
                  y = pc3,
                  color = model,
                  shape = model)) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),                          show.legend = FALSE) +
  scale_shape_manual(values = shape_map) +
  coord_fixed()

gg2b <- ggplot(data = geobike_subset,
              aes(x = pc1,
                  y = pc3,
                  color = style,
                  shape = model)) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),                          show.legend = FALSE) +
  scale_shape_manual(values = shape_map) +
  coord_fixed()

gg3 <- ggplot(data = geobike_subset,
              aes(x = pc2,
                  y = pc3,
                  color = model,
                  shape = model)) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),                          show.legend = FALSE) +
  scale_shape_manual(values = shape_map) +
  coord_fixed()

gg3b <- ggplot(data = geobike_subset,
              aes(x = pc2,
                  y = pc3,
                  color = style,
                  shape = model)) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),                          show.legend = FALSE) +
  scale_shape_manual(values = shape_map) +
  coord_fixed()

Notes

  1. High PC1 describes a frame with a long front center (without long reach) and high stack.
  2. High PC2 describes a frame with small reach.
girafe(ggobj = gg1b)

Figure 5.1: Hover over points to identify model and frame size

Notes

  1. High PC2 describes a frame with small reach.
  2. PC3 describes noise
girafe(ggobj = gg2b)

Figure 5.2: Hover over points to identify model and frame size

Notes

  1. High PC2 describes a frame with high stack and a slack head tube.
  2. PC3 describes noise
girafe(ggobj = gg3b)

Figure 5.3: Hover over points to identify model and frame size

5.2 Traditional measures and angles

Notes:

  1. PCA using the centered and scaled measures used to compute the dendrogram above and the classification.
y_cols <- c("stack", "reach", "front_center", "rear_center", "bottom_bracket_drop", "fork_offset_rake", "head_tube_angle", "seat_tube_angle")
y_labs <- c("stack", "reach", "front center", "rear center", "bottom bracket drop", "fork offset", "head tube angle", "seat tube angle")

geobike_subset <- geobike[my_fit == TRUE]
X <- geobike_subset[, .SD, .SDcols = y_cols] %>%
  scale()

S <- cov(X)

geo_eigen <- eigen(S)

L <- geo_eigen$values
E <- geo_eigen$vector
scores <- X %*% E
geobike_subset[, pc1 := scores[, 1]]
geobike_subset[, pc2 := scores[, 2]]
geobike_subset[, pc3 := scores[, 3]]


coord_loadings <- cor(cbind(scores[,1:3], X))[-(1:3), 1:3]
row.names(coord_loadings) <- y_labs
table_cap <- "Correlations (or loadings) between PCs and traditional frame measures."

coord_loadings %>%
  kable(digits = 2,
        caption = table_cap) %>%
  kable_styling(full_width = FALSE)
Table 5.2: Correlations (or loadings) between PCs and traditional frame measures.
stack -0.69 -0.48 -0.15
reach -0.58 0.69 -0.02
front center -0.97 0.15 -0.05
rear center -0.41 -0.67 0.02
bottom bracket drop -0.04 0.30 0.80
fork offset -0.12 -0.48 0.65
head tube angle 0.90 0.03 -0.04
seat tube angle -0.23 0.57 0.07
gg1 <- ggplot(data = geobike_subset,
              aes(x = pc1,
                  y = pc2,
                  color = style,
                  shape = model)) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),
                         show.legend = FALSE) +
  scale_shape_manual(values = shape_map) +
  coord_fixed()

gg2 <- ggplot(data = geobike_subset,
              aes(x = pc1,
                  y = pc3,
                  color = style,
                  shape = model)) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),
                         show.legend = FALSE) +
  scale_shape_manual(values = shape_map) +
  coord_fixed()

gg3 <- ggplot(data = geobike_subset,
              aes(x = pc2,
                  y = pc3,
                  color = style,
                  shape = model)) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),
                         show.legend = FALSE) +
  scale_shape_manual(values = shape_map) +
  coord_fixed()
girafe(ggobj = gg1)

Figure 5.4: Hover over points to identify model and frame size

Notes

  1. High PC1 describes a frame with a short front-center, steep head-angle, and short stack and reach.
  2. High PC2 describes a frame with long reach, a small rear-center, and high seat tube angle
girafe(ggobj = gg2)

Figure 5.5: Hover over points to identify model and frame size

Notes

  1. High PC1 describes a frame with a long front-center, slack head-angle, and high stack.
  2. High PC3 describes a frame with a large bottom bracket drop (a low bottom bracket)
girafe(ggobj = gg3)

Figure 5.6: Hover over points to identify model and frame size

Notes

  1. High PC2 describes a frame with long reach, a small rear-center, and high seat tube angle
  2. High PC3 describes a frame with a small bottom bracket drop (a high bottom bracket)

6 Style Re-classification

6.1 Traditional measures – reduced set

y_cols <- c("stack", "reach", "front_center", "rear_center", "head_tube_angle", "seat_tube_angle")
var_labels <- c("Stack", "Reach",
                "Front-center horizontal",
                "Rear-center horizontal",
                "Head tube angle", "Seat tube angle")
data.table(
  Variables = var_labels
) %>%
  kable() %>%
  kable_styling(full_width = FALSE)
Variables
Stack
Reach
Front-center horizontal
Rear-center horizontal
Head tube angle
Seat tube angle
y_cols <- c("stack", "reach", "front_center", "rear_center", "head_tube_angle", "seat_tube_angle")

geobike_subset <- geobike[my_fit == TRUE,]
scale_it <- TRUE
center_it <- TRUE
dendro_v2 <- treed(geobike_subset,
                y_cols,
                scale_it,
                center_it,
                hclust_method = "ward.D2") #ward.D2
gg <- ggdendrogram(dendro_v2)
 
gg

Notes

  1. Method – Ward’s method using centered/scaled measures of frames spec’d for my height
  2. Three major clusters, from left to right
  • trail: drop-bar mtn bikes and flat-bar gravel bikes
  • all-road and race gravel
  • bikepacking

6.2 Style re-classification table

options(knitr.kable.NA = '')
n_clusters <- 3 
class_dendro <- cutree(dendro_v2, k = n_clusters)
cluster_labels <- c("Trail", "Bikepacking", "All-road")
#cluster_labels <- c("Trail", "Trail", "Bikepacking", "All-road")
labels <- str_split_fixed(names(class_dendro), ",", 2)[,1]
style_class <- data.table(
  model = labels,
  restyle = cluster_labels[class_dendro]
)

# add style to geobike
geobike <- plyr::join(geobike, style_class, by = "model")
my_fit <- geobike[my_fit == TRUE,]

# dcast(setDT(DF), rowid(ID) ~ ID, value.var = "total")
cluster_labels <- c("All-road", "Bikepacking", "Trail")

style_table <-dcast(setDT(style_class), rowid(restyle) ~ restyle, value.var = "model")[, .SD, .SDcols = cluster_labels]

style_table %>%
  kable() %>%
  kable_styling(full_width = FALSE)
All-road Bikepacking Trail
Trek Boone 6 Mason InSearchOf Breezer Radar X Pro
Trek Checkpoint SL5 Tumbleweed Stargazer Evil Chamois Hagar GRX
Ribble Gravel SL Tout Terrain Scrambler 28 BMC URS One
Lauf Siegla Ritchey Outback frameset Surly Ghost Grappler
No22 Drifter X Bombtrack Beyond 2 Knolly Cache Steel
Niner RLT 9 RDO Salsa Vaya Specialized Diverge Evo
OPEN U.P. Salsa Fargo front dropout Marin DSX 2
Thesis OB1 Salsa Fargo rear dropout Whyte Friston Gravel
All-City Gorilla Monsoon Kona Sutra ULTD Enigma Escape Flat-bar
All-City Cosmic Stallion Cinelli Hobootleg Geo Merida Silex
Noble GX 5 BlackMtnCy La Cabra Fiftyone Assassin long-low
BlackMtnCy Monstercross V5 Salsa Cutthroat Fiftyone Assassin short-hi
Chumba Terlingua steel fdo Moots Routt ESC Revel Rover
Shand Stooshie Light Blue Darwin Kanzo Adventure New
Salsa Warbird Reeb Sams Pants Bombtrack Beyond+ Adv
Pinarello Grevil F Genesis Vagabond Amigo Bug Out
Canyon Grail 7 1by Bearclaw Beaux Jaxon Hudski Doggler Gravel
Canyon Grizl 7 1by Chiru Kegeti Sonder Camino AL
Obed Boundary Panorama Taiga EXP Otso Fenrir
Solace OM-3 Short Otso Warakin Stainless Alchemy Rogue
Santa Cruz Stigmata Mosaic GT-1X
Why R+ V4 Cotic Cascade
Specialized Diverge Chumba Yaupon
Bombtrack Hook BMC URS AL
Squid Gravtron BMC URS AL SUS
Blackheart All Road TI Nordest Kutxo
Cervelo Aspero

6.3 Pairwise V2

nudge_pos <- nudge_percent * (max(my_fit$stack) -
                                min(my_fit$stack))
gg1 <- ggplot(data = my_fit,
             aes(x = stack,
                 y = reach,
                 color = restyle,
                 label = model)) +
  geom_text(hjust = 0, nudge_x = nudge_pos, size = 2, show.legend = FALSE) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),
                         show.legend = FALSE)

gg2 <- ggplot(data = my_fit,
             aes(x = front_center,
                 y = rear_center,
                 color = restyle,
                 label = model)) +
  geom_text(hjust = 0, nudge_x = nudge_pos, size = 2, show.legend = FALSE) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),
                         show.legend = FALSE)

nudge_pos <- nudge_percent * (max(my_fit$head_tube_angle) -
                                min(my_fit$head_tube_angle))
gg3 <- ggplot(data = my_fit,
             aes(x = head_tube_angle,
                 y = seat_tube_angle,
                 color = restyle,
                 label = model)) +
  geom_text(hjust = 0, nudge_x = nudge_pos, size = 2, show.legend = FALSE) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),
                         show.legend = FALSE)
girafe(ggobj = gg1)
girafe(ggobj = gg2)
girafe(ggobj = gg3)

6.4 Ratio measures

y_cols <- c("stack_reach", "front_wheelbase", "sta_hta")
var_labels <- c("Stack:Reach",
                "Front-center:Wheelbase",
                "STA:HTA")
data.table(
  Variables = var_labels
) %>%
  kable() %>%
  kable_styling(full_width = FALSE)
Variables
Stack:Reach
Front-center:Wheelbase
STA:HTA
y_cols <- c("stack_reach", "front_wheelbase", "sta_hta")

geobike_subset <- geobike[my_fit == TRUE,]
scale_it <- TRUE
center_it <- TRUE
dendro_v2_ratios <- treed(geobike_subset,
                y_cols,
                scale_it,
                center_it,
                hclust_method = "ward.D2") #ward.D2
gg <- ggdendrogram(dendro_v2_ratios)
 
gg

6.5 ratios v2

nudge_pos <- nudge_percent * (max(my_fit$rear_wheelbase) -
                                min(my_fit$rear_wheelbase))
gg1 <- ggplot(data = my_fit,
             aes(x = front_wheelbase,
                 y = stack_reach,
                 color = restyle,
                 label = model)) +
  geom_text(hjust = 0, nudge_x = nudge_pos, size = 2, show.legend = FALSE) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),
                         show.legend = FALSE)

nudge_pos <- nudge_percent * (max(my_fit$rear_wheelbase) -
                                min(my_fit$rear_wheelbase))
gg2 <- ggplot(data = my_fit,
             aes(x = front_wheelbase,
                 y = seat_tube_angle/head_tube_angle,
                 color = restyle,
                 label = model)) +
  geom_text(hjust = 0, nudge_x = nudge_pos, size = 2, show.legend = FALSE) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),
                         show.legend = FALSE)

nudge_pos <- nudge_percent * (max(my_fit$stack_reach) -
                                min(my_fit$stack_reach))
gg3 <- ggplot(data = my_fit,
             aes(x = stack_reach,
                 y = sta_hta,
                 color = restyle,
                 label = model)) +
  geom_text(hjust = 0, nudge_x = nudge_pos, size = 2, show.legend = FALSE) +
  geom_point_interactive(aes(tooltip = model_size,
                             data_id = model_size),
                         show.legend = FALSE)
girafe(ggobj = gg1)
girafe(ggobj = gg2)
girafe(ggobj = gg3)